Abstract
Background We have run the simplified Naomi model using a range of inference methods.
Task In this report, we compare the accuracy of the posterior distributions obtained from these inference methods.
We compare the inference results from TMB, aghq and tmbstan.
Import these inference results as follows:
tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
tmbstan <- readRDS("depends/tmbstan.rds")
For more information about the conditions under which these results were generated, see:
depends <- yaml::read_yaml("orderly.yml")$depends
for(i in seq_along(depends)) {
report_name <- names(depends[[i]])
print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
print(orderly::orderly_info(report_id, report_name)$parameters)
}
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230114-173843-50cf86d7 and was run with the following parameters:"
## $tmb
## [1] TRUE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE)"
## [1] "Obtained report had ID 20230112-210550-b5292d7e and was run with the following parameters:"
## $aghq
## [1] TRUE
##
## $tmb
## [1] FALSE
##
## $k
## [1] 2
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
##
## $niter
## [1] 20000
##
## $nthin
## [1] 20
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $nsample
## [1] 1000
All of the possible parameter names are as follows:
unique(names(tmb$fit$obj$env$par))
## [1] "beta_rho" "beta_alpha" "beta_lambda" "beta_anc_rho" "beta_anc_alpha"
## [6] "logit_phi_rho_x" "log_sigma_rho_x" "logit_phi_rho_xs" "log_sigma_rho_xs" "logit_phi_rho_a"
## [11] "log_sigma_rho_a" "logit_phi_rho_as" "log_sigma_rho_as" "log_sigma_rho_xa" "u_rho_x"
## [16] "us_rho_x" "u_rho_xs" "us_rho_xs" "u_rho_a" "u_rho_as"
## [21] "logit_phi_alpha_x" "log_sigma_alpha_x" "logit_phi_alpha_xs" "log_sigma_alpha_xs" "logit_phi_alpha_a"
## [26] "log_sigma_alpha_a" "logit_phi_alpha_as" "log_sigma_alpha_as" "log_sigma_alpha_xa" "u_alpha_x"
## [31] "us_alpha_x" "u_alpha_xs" "us_alpha_xs" "u_alpha_a" "u_alpha_as"
## [36] "u_alpha_xa" "OmegaT_raw" "log_betaT" "log_sigma_lambda_x" "ui_lambda_x"
## [41] "log_sigma_ancrho_x" "log_sigma_ancalpha_x" "ui_anc_rho_x" "ui_anc_alpha_x" "log_sigma_or_gamma"
## [46] "log_or_gamma"
data.frame(
"TMB" = tmb$time,
"aghq" = aghq$time,
"tmbstan" = tmbstan$time
)
histogram_plot("beta_anc_rho")
ks_helper <- function(par) to_ks_df(par) %>% ks_plot(par = par)
ks_helper("beta")
ks_helper("logit")
ks_helper("log_sigma")
ks_helper("u_rho_x")
ks_helper("u_rho_xs")
ks_helper("us_rho_x")
ks_helper("us_rho_xs")
ks_helper("u_rho_a")
ks_helper("u_rho_as")
ks_helper("u_alpha_x")
ks_helper("u_alpha_xs")
ks_helper("us_alpha_x")
ks_helper("us_alpha_xs")
ks_helper("u_alpha_a")
ks_helper("u_alpha_as")
ks_helper("u_alpha_xa")
ks_helper("ui_anc_rho_x")
ks_helper("ui_anc_alpha_x")
ks_helper("log_or_gamma")
ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
to_ks_df(x) %>%
group_by(method) %>%
summarise(ks = mean(ks), par = x)
}) %>%
bind_rows() %>%
pivot_wider(names_from = "method", values_from = "ks") %>%
rename(
"Parameter" = "par",
"KS(aghq, tmbstan)" = "aghq",
"KS(TMB, tmbstan)" = "TMB",
)
ks_summary %>%
gt::gt() %>%
gt::fmt_number(
columns = starts_with("KS"),
decimals = 3
)
| Parameter | KS(aghq, tmbstan) | KS(TMB, tmbstan) |
|---|---|---|
| beta_rho | 0.094 | 0.096 |
| beta_alpha | 0.110 | 0.104 |
| beta_lambda | 0.053 | 0.074 |
| beta_anc_rho | 0.076 | 0.072 |
| beta_anc_alpha | 0.036 | 0.031 |
| logit_phi_rho_x | 0.425 | 0.536 |
| log_sigma_rho_x | 0.589 | 0.646 |
| logit_phi_rho_xs | 0.387 | 0.510 |
| log_sigma_rho_xs | 0.822 | 0.613 |
| logit_phi_rho_a | 0.828 | 0.552 |
| log_sigma_rho_a | 0.448 | 0.585 |
| logit_phi_rho_as | 0.823 | 0.569 |
| log_sigma_rho_as | 0.298 | 0.589 |
| log_sigma_rho_xa | 0.441 | 0.669 |
| u_rho_x | 0.096 | 0.094 |
| us_rho_x | 0.066 | 0.063 |
| u_rho_xs | 0.125 | 0.122 |
| us_rho_xs | 0.040 | 0.040 |
| u_rho_a | 0.092 | 0.095 |
| u_rho_as | 0.093 | 0.095 |
| logit_phi_alpha_x | 0.278 | 0.532 |
| log_sigma_alpha_x | 0.706 | 0.557 |
| logit_phi_alpha_xs | 0.291 | 0.552 |
| log_sigma_alpha_xs | 0.675 | 0.540 |
| logit_phi_alpha_a | 0.784 | 0.524 |
| log_sigma_alpha_a | 0.297 | 0.541 |
| logit_phi_alpha_as | 0.734 | 0.515 |
| log_sigma_alpha_as | 0.232 | 0.514 |
| log_sigma_alpha_xa | 0.708 | 0.627 |
| u_alpha_x | 0.100 | 0.096 |
| us_alpha_x | 0.089 | 0.091 |
| u_alpha_xs | 0.107 | 0.102 |
| us_alpha_xs | 0.105 | 0.106 |
| u_alpha_a | 0.091 | 0.083 |
| u_alpha_as | 0.119 | 0.109 |
| u_alpha_xa | 0.072 | 0.069 |
| OmegaT_raw | 0.196 | 0.518 |
| log_betaT | 0.166 | 0.689 |
| log_sigma_lambda_x | 0.547 | 0.737 |
| ui_lambda_x | 0.159 | 0.162 |
| log_sigma_ancrho_x | 0.714 | 0.504 |
| log_sigma_ancalpha_x | 0.705 | 0.519 |
| ui_anc_rho_x | 0.056 | 0.056 |
| ui_anc_alpha_x | 0.066 | 0.065 |
| log_sigma_or_gamma | 0.500 | 0.524 |
| log_or_gamma | 0.061 | 0.061 |
ggplot(ks_summary, aes(x = `KS(TMB, tmbstan)`, y = `KS(aghq, tmbstan)`)) +
geom_point(alpha = 0.5) +
xlim(0, 1) +
ylim(0, 1) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
labs(x = "KS(aghq, tmbstan)", y = "KS(TMB, tmbstan)") +
theme_minimal()
ks_summary %>%
mutate(`KS difference` = `KS(TMB, tmbstan)` - `KS(aghq, tmbstan)`) %>%
ggplot(aes(x = `KS difference`)) +
geom_boxplot(width = 0.5) +
coord_flip() +
labs(x = "KS(TMB, tmbstan) - KS(aghq, tmbstan)") +
theme_minimal()
#' To write!
Suppose we have two sets of samples from the posterior.
For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios.
We can extract the TMB objective function for the log-likelihood as follows:
tmb$fit$obj$fn()
## [1] NaN